In [2]:
%matplotlib inline

# Imports
import gc
import matplotlib.pyplot as plt
import os
import statsmodels.stats.proportion
import sklearn.pipeline

# seaborn
import seaborn
seaborn.set()
seaborn.set_style("darkgrid")

# Project imports
from legacy_model import *

In [3]:
# Get raw data
raw_data = get_raw_scdb_data("../data/input/SCDB_Legacy_01_justiceCentered_Citation.csv")

# Get feature data
if os.path.exists("../data/output/feature_data.hdf.gz"):
    print("Loading from HDF5 cache")
    feature_df = pandas.read_hdf("../data/output/feature_data.hdf.gz", "root")
else:
    # Process
    feature_df = preprocess_raw_data(raw_data, include_direction=True)
    
    # Write out feature datas
    feature_df.to_hdf("../data/output/feature_data.hdf.gz", "root", complevel=6, complib="zlib")


Loading from HDF5 cache

In [4]:
# Downsample to float
feature_df = feature_df.astype(numpy.float16)

In [30]:
# Remove term
nonterm_features = [f for f in feature_df.columns if not f.startswith("term_") and not f.startswith("natural_court")]
original_feature_df = feature_df.copy()
feature_df = original_feature_df.loc[:, nonterm_features].copy()
gc.collect()


Out[30]:
223

In [32]:
# Output some diagnostics on features
print(raw_data.shape)
print(feature_df.shape)
assert(raw_data.shape[0] == feature_df.shape[0])


(249793, 80)
(249793, 1396)

In [33]:
# Reset output file timestamp per run
file_timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")

# Reset seed per run
numpy.random.seed(0)

# Setup training time period
dummy_window = 10
min_training_years = 25
term_range = range(raw_data["term"].min() + min_training_years,
                   raw_data["term"].max() + 1)

# Setting growing random forest parameters
# Number of trees to grow per term
trees_per_term = 1

# Number of trees to begin with
initial_trees = min_training_years * trees_per_term

# Setup model
m = None
term_count = 0
feature_importance_df = pandas.DataFrame()

for term in term_range:
    # Diagnostic output
    print("Term: {0}".format(term))
    term_count += 1
    
    # Setup train and test periods
    train_index = (raw_data.loc[:, "term"] < term).values
    dummy_train_index = ((raw_data.loc[:, "term"] < term) & (raw_data.loc[:, "term"] >= (term-dummy_window))).values
    test_index = (raw_data.loc[:, "term"] == term).values
    if test_index.sum() == 0:
        continue
    
    # Setup train data
    feature_data_train = feature_df.loc[train_index, :]
    target_data_train = (raw_data.loc[train_index, "justice_outcome_disposition"]).astype(int)
    target_data_weights = target_data_train.value_counts() / target_data_train.shape[0]

    # Setup test data
    feature_data_test = feature_df.loc[test_index, :]
    target_data_test = (raw_data.loc[test_index, "justice_outcome_disposition"]).astype(int)
    
    # Check if the justice set has changed
    if set(raw_data.loc[raw_data.loc[:, "term"] == (term-1), "naturalCourt"].unique()) != \
        set(raw_data.loc[raw_data.loc[:, "term"] == (term), "naturalCourt"].unique()):
        # natural Court change; trigger forest fire
        print("Natural court change; rebuilding with {0} trees".format(initial_trees + (term_count * trees_per_term)))
        m = None

    # Build or grow a model depending on initial/reset condition
    if not m:
        # Grow an initial forest
        rf = sklearn.ensemble.RandomForestClassifier(n_estimators=initial_trees + (term_count * trees_per_term),
                                                     class_weight=target_data_weights.to_dict(),
                                                    warm_start=True,
                                                    n_jobs=-1)
        m = rf
    else:
        # Grow the forest by increasing the number of trees (requires warm_start=True)
        m.set_params(n_estimators=initial_trees + (term_count * trees_per_term))

    # Fit the forest model
    m.fit(feature_data_train,
          target_data_train)
    #print(m.get_params())
    
    # Record feature weights
    current_feature_importance_df = pandas.DataFrame(list(zip(feature_df.columns, m.feature_importances_)),
                                         columns=["feature", "importance"])
    current_feature_importance_df.loc[:, "term"] = term
    if feature_importance_df.shape[0] == 0:
        feature_importance_df = current_feature_importance_df.copy()
    else:
        feature_importance_df = feature_importance_df.append(current_feature_importance_df.copy())

    # Fit the "dummy" model
    d = sklearn.dummy.DummyClassifier(strategy="most_frequent")
    d.fit(feature_df.loc[dummy_train_index, :],
          (raw_data.loc[dummy_train_index, "justice_outcome_disposition"]).astype(int))
    
    # Perform forest predictions
    raw_data.loc[test_index, "rf_predicted"] = m.predict(feature_data_test)
    
    # Store scores per class
    scores = m.predict_proba(feature_data_test)
    raw_data.loc[test_index, "rf_predicted_score_other"] = scores[:, 0]
    raw_data.loc[test_index, "rf_predicted_score_affirm"] = scores[:, 1]
    raw_data.loc[test_index, "rf_predicted_score_reverse"] = scores[:, 2]
    
    # Store dummy predictions
    raw_data.loc[test_index, "dummy_predicted"] = d.predict(feature_data_test)
    
    #  Clear
    del feature_data_train
    del feature_data_test
    del target_data_train
    del target_data_test
    gc.collect()


Term: 1816
Term: 1817
Term: 1818
Term: 1819
Term: 1820
Term: 1821
Term: 1822
Term: 1823
Term: 1824
Natural court change; rebuilding with 34 trees
Term: 1825
Term: 1826
Term: 1827
Natural court change; rebuilding with 37 trees
Term: 1828
Term: 1829
Term: 1830
Natural court change; rebuilding with 40 trees
Term: 1831
Term: 1832
Term: 1833
Term: 1834
Term: 1835
Natural court change; rebuilding with 45 trees
Term: 1836
Term: 1837
Natural court change; rebuilding with 47 trees
Term: 1838
Natural court change; rebuilding with 48 trees
Term: 1839
Term: 1840
Term: 1841
Term: 1842
Natural court change; rebuilding with 52 trees
Term: 1843
Term: 1844
Term: 1845
Natural court change; rebuilding with 55 trees
Term: 1846
Natural court change; rebuilding with 56 trees
Term: 1847
Natural court change; rebuilding with 57 trees
Term: 1848
Term: 1849
Term: 1850
Natural court change; rebuilding with 60 trees
Term: 1851
Natural court change; rebuilding with 61 trees
Term: 1852
Term: 1853
Natural court change; rebuilding with 63 trees
Term: 1854
Term: 1855
Term: 1856
Term: 1857
Natural court change; rebuilding with 67 trees
Term: 1858
Natural court change; rebuilding with 68 trees
Term: 1859
Term: 1860
Term: 1861
Natural court change; rebuilding with 71 trees
Term: 1862
Natural court change; rebuilding with 72 trees
Term: 1863
Natural court change; rebuilding with 73 trees
Term: 1864
Natural court change; rebuilding with 74 trees
Term: 1865
Term: 1866
Term: 1867
Term: 1868
Term: 1869
Natural court change; rebuilding with 79 trees
Term: 1870
Natural court change; rebuilding with 80 trees
Term: 1871
Term: 1872
Natural court change; rebuilding with 82 trees
Term: 1873
Natural court change; rebuilding with 83 trees
Term: 1874
Natural court change; rebuilding with 84 trees
Term: 1875
Term: 1876
Term: 1877
Natural court change; rebuilding with 87 trees
Term: 1878
Natural court change; rebuilding with 88 trees
Term: 1879
Term: 1880
Natural court change; rebuilding with 90 trees
Term: 1881
Natural court change; rebuilding with 91 trees
Term: 1882
Natural court change; rebuilding with 92 trees
Term: 1883
Term: 1884
Term: 1885
Term: 1886
Term: 1887
Natural court change; rebuilding with 97 trees
Term: 1888
Natural court change; rebuilding with 98 trees
Term: 1889
Natural court change; rebuilding with 99 trees
Term: 1890
Natural court change; rebuilding with 100 trees
Term: 1891
Natural court change; rebuilding with 101 trees
Term: 1892
Natural court change; rebuilding with 102 trees
Term: 1893
Natural court change; rebuilding with 103 trees
Term: 1894
Natural court change; rebuilding with 104 trees
Term: 1895
Natural court change; rebuilding with 105 trees
Term: 1896
Natural court change; rebuilding with 106 trees
Term: 1897
Natural court change; rebuilding with 107 trees
Term: 1898
Natural court change; rebuilding with 108 trees
Term: 1899
Term: 1900
Term: 1901
Term: 1902
Natural court change; rebuilding with 112 trees
Term: 1903
Natural court change; rebuilding with 113 trees
Term: 1904
Term: 1905
Term: 1906
Natural court change; rebuilding with 116 trees
Term: 1907
Natural court change; rebuilding with 117 trees
Term: 1908
Term: 1909
Natural court change; rebuilding with 119 trees
Term: 1910
Natural court change; rebuilding with 120 trees
Term: 1911
Natural court change; rebuilding with 121 trees
Term: 1912
Natural court change; rebuilding with 122 trees
Term: 1913
Term: 1914
Natural court change; rebuilding with 124 trees
Term: 1915
Natural court change; rebuilding with 125 trees
Term: 1916
Natural court change; rebuilding with 126 trees
Term: 1917
Term: 1918
Term: 1919
Term: 1920
Term: 1921
Natural court change; rebuilding with 131 trees
Term: 1922
Natural court change; rebuilding with 132 trees
Term: 1923
Natural court change; rebuilding with 133 trees
Term: 1924
Natural court change; rebuilding with 134 trees
Term: 1925
Natural court change; rebuilding with 135 trees
Term: 1926
Term: 1927
Term: 1928
Term: 1929
Natural court change; rebuilding with 139 trees
Term: 1930
Natural court change; rebuilding with 140 trees
Term: 1931
Natural court change; rebuilding with 141 trees
Term: 1932
Natural court change; rebuilding with 142 trees
Term: 1933
Term: 1934
Term: 1935
Term: 1936
Term: 1937
Natural court change; rebuilding with 147 trees
Term: 1938
Natural court change; rebuilding with 148 trees
Term: 1939
Natural court change; rebuilding with 149 trees
Term: 1940
Natural court change; rebuilding with 150 trees
Term: 1941
Natural court change; rebuilding with 151 trees
Term: 1942
Term: 1943
Term: 1944
Term: 1945
Natural court change; rebuilding with 155 trees
Term: 1946
Natural court change; rebuilding with 156 trees
Term: 1947
Term: 1948
Term: 1949
Natural court change; rebuilding with 159 trees
Term: 1950
Term: 1951
Term: 1952
Term: 1953
Natural court change; rebuilding with 163 trees
Term: 1954
Natural court change; rebuilding with 164 trees
Term: 1955
Natural court change; rebuilding with 165 trees
Term: 1956
Natural court change; rebuilding with 166 trees
Term: 1957
Natural court change; rebuilding with 167 trees
Term: 1958
Natural court change; rebuilding with 168 trees
Term: 1959
Natural court change; rebuilding with 169 trees
Term: 1960
Term: 1961
Natural court change; rebuilding with 171 trees
Term: 1962
Natural court change; rebuilding with 172 trees
Term: 1963
Term: 1964
Term: 1965
Natural court change; rebuilding with 175 trees
Term: 1966
Term: 1967
Natural court change; rebuilding with 177 trees
Term: 1968
Natural court change; rebuilding with 178 trees
Term: 1969
Natural court change; rebuilding with 179 trees
Term: 1970
Natural court change; rebuilding with 180 trees
Term: 1971
Natural court change; rebuilding with 181 trees
Term: 1972
Natural court change; rebuilding with 182 trees
Term: 1973
Term: 1974
Term: 1975
Natural court change; rebuilding with 185 trees
Term: 1976
Natural court change; rebuilding with 186 trees
Term: 1977
Term: 1978
Term: 1979
Term: 1980
Term: 1981
Natural court change; rebuilding with 191 trees
Term: 1982
Term: 1983
Term: 1984
Term: 1985
Term: 1986
Natural court change; rebuilding with 196 trees
Term: 1987
Natural court change; rebuilding with 197 trees
Term: 1988
Natural court change; rebuilding with 198 trees
Term: 1989
Term: 1990
Natural court change; rebuilding with 200 trees
Term: 1991
Natural court change; rebuilding with 201 trees
Term: 1992
Term: 1993
Natural court change; rebuilding with 203 trees
Term: 1994
Natural court change; rebuilding with 204 trees
Term: 1995
Term: 1996
Term: 1997
Term: 1998
Term: 1999
Term: 2000
Term: 2001
Term: 2002
Term: 2003
Term: 2004
Term: 2005
Natural court change; rebuilding with 215 trees
Term: 2006
Natural court change; rebuilding with 216 trees
Term: 2007
Term: 2008
Term: 2009
Natural court change; rebuilding with 219 trees
Term: 2010
Natural court change; rebuilding with 220 trees
Term: 2011
Term: 2012
Term: 2013
Term: 2014

In [34]:
# Evaluation range
evaluation_index = raw_data.loc[:, "term"].isin(term_range)
target_actual = (raw_data.loc[evaluation_index, "justice_outcome_disposition"]).astype(int)
target_predicted = raw_data.loc[evaluation_index, "rf_predicted"].astype(int)
target_dummy = raw_data.loc[evaluation_index, "dummy_predicted"].astype(int)
raw_data.loc[:, "rf_correct"] = numpy.nan
raw_data.loc[:, "dummy_correct"] = numpy.nan
raw_data.loc[evaluation_index, "rf_correct"] = (target_actual == target_predicted).astype(float)
raw_data.loc[evaluation_index, "dummy_correct"] = (target_actual == target_dummy).astype(float)

# Setup reverse testing
reverse_target_actual = (raw_data.loc[evaluation_index, "justice_outcome_disposition"] > 0).astype(int)
reverse_target_predicted = (raw_data.loc[evaluation_index, "rf_predicted"] > 0).astype(int)
reverse_target_dummy = (raw_data.loc[evaluation_index, "dummy_predicted"] > 0).astype(int)
raw_data.loc[:, "rf_reverse_correct"] = numpy.nan
raw_data.loc[:, "dummy_reverse_correct"] = numpy.nan
raw_data.loc[evaluation_index, "rf_reverse_correct"] = (reverse_target_actual == reverse_target_predicted).astype(float)
raw_data.loc[evaluation_index, "dummy_reverse_correct"] = (reverse_target_actual == reverse_target_dummy).astype(float)

Justice Accuracy - Other/Affirm/Reverse


In [35]:
# Compare model
print("RF model")
print("="*32)
print(sklearn.metrics.classification_report(target_actual, target_predicted))
print(sklearn.metrics.confusion_matrix(target_actual, target_predicted))
print(sklearn.metrics.accuracy_score(target_actual, target_predicted))
print("="*32)
print("")

# Dummy model
print("Dummy model")
print("="*32)
print(sklearn.metrics.classification_report(target_actual, target_dummy))
print(sklearn.metrics.confusion_matrix(target_actual, target_dummy))
print(sklearn.metrics.accuracy_score(target_actual, target_dummy))
print("="*32)
print("")


RF model
================================
             precision    recall  f1-score   support

         -1       0.78      0.62      0.69     39540
          0       0.62      0.72      0.67    113666
          1       0.61      0.54      0.57     93569

avg / total       0.64      0.64      0.64    246775

[[24532  9995  5013]
 [ 4329 82379 26958]
 [ 2441 40492 50636]]
0.638423665282
================================

Dummy model
================================
             precision    recall  f1-score   support

         -1       0.33      0.01      0.02     39540
          0       0.52      0.77      0.62    113666
          1       0.57      0.46      0.51     93569

avg / total       0.50      0.53      0.48    246775

[[  495 31931  7114]
 [  618 87102 25946]
 [  398 49888 43283]]
0.530361665485
================================

Justice Accuracy - Reverse/Not-Reverse


In [36]:
# Compare model
print("RF model")
print("="*32)
print(sklearn.metrics.classification_report(reverse_target_actual, reverse_target_predicted))
print(sklearn.metrics.confusion_matrix(reverse_target_actual, reverse_target_predicted))
print(sklearn.metrics.accuracy_score(reverse_target_actual, reverse_target_predicted))
print("="*32)
print("")

# Dummy model
print("Dummy model")
print("="*32)
print(sklearn.metrics.classification_report(reverse_target_actual, reverse_target_dummy))
print(sklearn.metrics.confusion_matrix(reverse_target_actual, reverse_target_dummy))
print(sklearn.metrics.accuracy_score(reverse_target_actual, reverse_target_dummy))
print("="*32)
print("")


RF model
================================
             precision    recall  f1-score   support

          0       0.74      0.79      0.76    153206
          1       0.61      0.54      0.57     93569

avg / total       0.69      0.70      0.69    246775

[[121235  31971]
 [ 42933  50636]]
0.696468442914
================================

Dummy model
================================
             precision    recall  f1-score   support

          0       0.70      0.78      0.74    153206
          1       0.57      0.46      0.51     93569

avg / total       0.65      0.66      0.65    246775

[[120146  33060]
 [ 50286  43283]]
0.662259142944
================================

Justice Accuracy - Baseline and RF, Reverse/Not-Reverse


In [37]:
# Setup time series
rf_correct_ts = raw_data.loc[evaluation_index, :].groupby("term")["rf_correct"].mean()
dummy_correct_ts = raw_data.loc[evaluation_index, :].groupby("term")["dummy_correct"].mean()
rf_reverse_correct_ts = raw_data.loc[evaluation_index, :].groupby("term")["rf_reverse_correct"].mean()
dummy_reverse_correct_ts = raw_data.loc[evaluation_index, :].groupby("term")["dummy_reverse_correct"].mean()


# Plot all accuracies
f = plt.figure(figsize=(16, 12))
plt.plot(rf_reverse_correct_ts.index, rf_reverse_correct_ts,
         marker='o', alpha=0.75)
plt.plot(dummy_reverse_correct_ts.index, dummy_reverse_correct_ts,
         marker='>', alpha=0.75)
plt.legend(('Random forest', 'Dummy'))


Out[37]:
<matplotlib.legend.Legend at 0x7fd11ae8e2e8>

Justice Accuracy - Spread between Baseline and RF


In [38]:
# Setup time series
rf_spread_ts = rf_reverse_correct_ts - dummy_reverse_correct_ts

# Plot all accuracies
f = plt.figure(figsize=(16, 12))
plt.bar(rf_spread_ts.index, rf_spread_ts,
        alpha=0.75)
plt.xlabel("Term")
plt.ylabel("Spread (%)")
plt.title("Spread over dummy model for justice accuracy")


Out[38]:
<matplotlib.text.Text at 0x7fd11ade27b8>

Statistical Tests - Other/Affirm/Reverse


In [39]:
# Output stats
print("t-test:")
print("Uncalibrated:")
print(scipy.stats.ttest_rel(rf_correct_ts.values,
                   dummy_correct_ts.values))

print("=" * 16)
print("ranksum-test:")
print("Uncalibrated:")
print(scipy.stats.ranksums(rf_correct_ts.values,
                   dummy_correct_ts.values))

print("=" * 16)
print("Binomial:")
print(statsmodels.stats.proportion.binom_test(raw_data.loc[evaluation_index, "rf_correct"].sum(),
                                              raw_data.loc[evaluation_index, "rf_correct"].shape[0],
                                              raw_data.loc[evaluation_index, "dummy_correct"].mean(),
                                              alternative="larger"))


t-test:
Uncalibrated:
Ttest_relResult(statistic=20.704581588122593, pvalue=1.9808656817949511e-51)
================
ranksum-test:
Uncalibrated:
RanksumsResult(statistic=12.383550419032909, pvalue=3.2082945542388788e-35)
================
Binomial:
0.0

Statistical Tests - Reverse/Not-Reverse


In [40]:
# Output stats
print("t-test:")
print("Uncalibrated:")
print(scipy.stats.ttest_rel(rf_reverse_correct_ts.values,
                   dummy_reverse_correct_ts.values))

print("=" * 16)
print("ranksum-test:")
print("Uncalibrated:")
print(scipy.stats.ranksums(rf_reverse_correct_ts.values,
                   dummy_reverse_correct_ts.values))

print("=" * 16)
print("Binomial:")
print(statsmodels.stats.proportion.binom_test(raw_data.loc[evaluation_index, "rf_reverse_correct"].sum(),
                                              raw_data.loc[evaluation_index, "rf_reverse_correct"].shape[0],
                                              raw_data.loc[evaluation_index, "dummy_reverse_correct"].mean(),
                                              alternative="larger"))


t-test:
Uncalibrated:
Ttest_relResult(statistic=5.2463811793715616, pvalue=3.9722839040620391e-07)
================
ranksum-test:
Uncalibrated:
RanksumsResult(statistic=2.4222433771781824, pvalue=0.015425016079385817)
================
Binomial:
4.91098781526e-288

Sample Feature Weights


In [41]:
# Feature importance
last_feature_importance_df = pandas.DataFrame(list(zip(feature_df.columns, m.feature_importances_)),
                                         columns=["feature", "importance"])
last_feature_importance_df.sort_values(["importance"], ascending=False).head(10)


Out[41]:
feature importance
1394 justice_previous_lc_direction_diff 0.031447
1395 justice_cumulative_lc_direction_diff 0.029566
26 decision_delay 0.027278
1384 justice_previous_court_action 0.021945
1390 justice_previous_court_agreement 0.021381
1385 justice_cumulative_court_action 0.020187
1382 justice_previous_action 0.020122
1391 justice_cumulative_court_agreement 0.019942
1387 justice_cumulative_court_action_diff 0.019093
1383 justice_cumulative_action 0.018095

Case Level Analysis


In [42]:
# Get outcomes as reverse/not-reverse for real data
raw_data.loc[:, "justice_outcome_reverse"] = (raw_data.loc[:, "justice_outcome_disposition"] > 0).astype(int)
raw_data.loc[:, "case_outcome_reverse"] = (raw_data.loc[:, "case_outcome_disposition"] > 0).astype(int)

In [43]:
# Store reverse predictions
raw_data.loc[evaluation_index, "rf_predicted_reverse"] = (raw_data.loc[evaluation_index, "rf_predicted"] > 0).astype(int)
raw_data.loc[evaluation_index, "dummy_predicted_reverse"] = (raw_data.loc[evaluation_index, "dummy_predicted"] > 0).astype(int)

# Group by case
rf_predicted_case = (raw_data.loc[evaluation_index, :]\
    .groupby("docketId")["rf_predicted_reverse"].mean() >= 0.5).astype(int)

dummy_predicted_case = (raw_data.loc[evaluation_index, :]\
    .groupby("docketId")["dummy_predicted_reverse"].mean() >= 0.5).astype(int)

actual_case = (raw_data.loc[evaluation_index, :]\
    .groupby("docketId")["case_outcome_reverse"].mean() > 0).astype(int)

In [44]:
# Setup case dataframe
case_data = pandas.DataFrame(rf_predicted_case).join(dummy_predicted_case).join(actual_case)
case_data = case_data.join(raw_data.groupby("docketId")[["term", "naturalCourt"]].mean().astype(int))

# Setup correct columns
case_data.loc[:, "rf_correct_case"] = numpy.nan
case_data.loc[:, "dummy_correct_case"] = numpy.nan
case_data.loc[:, "rf_correct_case"] = (case_data.loc[:, "rf_predicted_reverse"] == case_data.loc[:, "case_outcome_reverse"])\
    .astype(int)
case_data.loc[:, "dummy_correct_case"] = (case_data.loc[:, "dummy_predicted_reverse"] == case_data.loc[:, "case_outcome_reverse"])\
    .astype(int)

# Join back onto raw data
case_data.loc[:, "docketId"] = case_data.index
raw_data = raw_data.join(case_data.loc[:, ["docketId", "rf_correct_case", "dummy_correct_case"]], on="docketId", rsuffix="_case")

Case Accuracy


In [45]:
# Output comparison
# Evaluation range
evaluation_index = case_data.loc[:, "term"].isin(term_range)
target_actual = case_data.loc[:, "case_outcome_reverse"]
target_predicted = case_data.loc[:, "rf_predicted_reverse"]
target_dummy = case_data.loc[:, "dummy_predicted_reverse"]

# Compare model
print("RF model")
print("="*32)
print(sklearn.metrics.classification_report(target_actual, target_predicted))
print(sklearn.metrics.confusion_matrix(target_actual, target_predicted))
print(sklearn.metrics.accuracy_score(target_actual, target_predicted))
print("="*32)
print("")

# Dummy model
print("Dummy model")
print("="*32)
print(sklearn.metrics.classification_report(target_actual, target_dummy))
print(sklearn.metrics.confusion_matrix(target_actual, target_dummy))
print(sklearn.metrics.accuracy_score(target_actual, target_dummy))
print("="*32)
print("")


RF model
================================
             precision    recall  f1-score   support

          0       0.73      0.80      0.76     16740
          1       0.65      0.57      0.61     11340

avg / total       0.70      0.70      0.70     28080

[[13330  3410]
 [ 4919  6421]]
0.703383190883
================================

Dummy model
================================
             precision    recall  f1-score   support

          0       0.69      0.81      0.75     16740
          1       0.63      0.47      0.54     11340

avg / total       0.67      0.67      0.66     28080

[[13586  3154]
 [ 5974  5366]]
0.674928774929
================================

Case Accuracy (last century)


In [46]:
# Output comparison
# Evaluation range
last_century = case_data["term"].drop_duplicates().sort_values().tail(100)
evaluation_index = case_data.loc[:, "term"].isin(last_century)
target_actual = case_data.loc[:, "case_outcome_reverse"]
target_predicted = case_data.loc[:, "rf_predicted_reverse"]
target_dummy = case_data.loc[:, "dummy_predicted_reverse"]

# Compare model
print("RF model")
print("="*32)
print(sklearn.metrics.classification_report(target_actual, target_predicted))
print(sklearn.metrics.confusion_matrix(target_actual, target_predicted))
print(sklearn.metrics.accuracy_score(target_actual, target_predicted))
print("="*32)
print("")

# Dummy model
print("Dummy model")
print("="*32)
print(sklearn.metrics.classification_report(target_actual, target_dummy))
print(sklearn.metrics.confusion_matrix(target_actual, target_dummy))
print(sklearn.metrics.accuracy_score(target_actual, target_dummy))
print("="*32)
print("")


RF model
================================
             precision    recall  f1-score   support

          0       0.73      0.80      0.76     16740
          1       0.65      0.57      0.61     11340

avg / total       0.70      0.70      0.70     28080

[[13330  3410]
 [ 4919  6421]]
0.703383190883
================================

Dummy model
================================
             precision    recall  f1-score   support

          0       0.69      0.81      0.75     16740
          1       0.63      0.47      0.54     11340

avg / total       0.67      0.67      0.66     28080

[[13586  3154]
 [ 5974  5366]]
0.674928774929
================================

Case Accuracy Time Series


In [48]:
# Setup time series
case_evaluation_index = ~case_data.loc[:, "rf_correct_case"].isnull()
rf_correct_case_ts = case_data.loc[case_evaluation_index, :].groupby("term")["rf_correct_case"].mean()
dummy_correct_case_ts = case_data.loc[case_evaluation_index, :].groupby("term")["dummy_correct_case"].mean()

# Plot all accuracies
f = plt.figure(figsize=(16, 12))
plt.plot(rf_correct_case_ts.index, rf_correct_case_ts,
         marker='o', alpha=0.75)
plt.plot(dummy_correct_case_ts.index, dummy_correct_case_ts,
         marker='>', alpha=0.75)
plt.legend(('Random forest', 'Dummy'))


Out[48]:
<matplotlib.legend.Legend at 0x7fd11aa9e5f8>

In [49]:
# Setup time series
rf_spread_case_ts = rf_correct_case_ts - dummy_correct_case_ts

# Plot all accuracies
f = plt.figure(figsize=(16, 12))
plt.bar(rf_spread_case_ts.index, rf_spread_case_ts,
        alpha=0.75)
plt.xlabel("Term")
plt.ylabel("Spread (%)")
plt.title("Spread over dummy model for case accuracy")


Out[49]:
<matplotlib.text.Text at 0x7fd11a9ede10>

Cumulative Term Win/Loss


In [50]:
# Setup time series
rf_spread_case_dir_ts = pandas.expanding_sum(numpy.sign(rf_spread_case_ts))

# Plot all accuracies
f = plt.figure(figsize=(16, 12))
plt.plot(rf_spread_case_dir_ts.index, rf_spread_case_dir_ts,
        alpha=0.75)


/usr/local/lib/python3.5/dist-packages/ipykernel/__main__.py:2: FutureWarning: pd.expanding_sum is deprecated for Series and will be removed in a future version, replace with 
	Series.expanding(min_periods=1).sum()
  from ipykernel import kernelapp as app
Out[50]:
[<matplotlib.lines.Line2D at 0x7fd11a702f60>]

Statistical Tests - Case Accuracy (all time)


In [52]:
# Output stats
print("t-test:")
print("Uncalibrated:")
print(scipy.stats.ttest_rel(rf_correct_case_ts.values,
                   dummy_correct_case_ts.values))

print("=" * 16)
print("ranksum-test:")
print("Uncalibrated:")
print(scipy.stats.ranksums(rf_correct_case_ts.values,
                   dummy_correct_case_ts.values))

print("=" * 16)
print("Binomial:")
print(statsmodels.stats.proportion.binom_test(case_data["rf_correct_case"].sum(),
                                              case_data["rf_correct_case"].shape[0],
                                              case_data["dummy_correct_case"].mean(),
                                              alternative="larger"))


t-test:
Uncalibrated:
Ttest_relResult(statistic=4.8429562701978153, pvalue=2.5739842010844674e-06)
================
ranksum-test:
Uncalibrated:
RanksumsResult(statistic=2.4096071012403937, pvalue=0.01596970805061913)
================
Binomial:
5.67196870216e-25

Statistical Tests - Case Accuracy (last century)


In [53]:
# Output stats
print("t-test:")
print("Uncalibrated:")
print(scipy.stats.ttest_rel(rf_correct_case_ts.tail(100).values,
                   dummy_correct_case_ts.tail(100).values))

print("=" * 16)
print("ranksum-test:")
print("Uncalibrated:")
print(scipy.stats.ranksums(rf_correct_case_ts.tail(100).values,
                   dummy_correct_case_ts.tail(100).values))

print("=" * 16)
print("Binomial:")
last_century = case_data["term"].drop_duplicates().sort_values().tail(100)
print(statsmodels.stats.proportion.binom_test(case_data.loc[case_data["term"].isin(last_century), "rf_correct_case"].sum(),
                                              case_data.loc[case_data["term"].isin(last_century), "rf_correct_case"].shape[0],
                                              case_data.loc[case_data["term"].isin(last_century), "dummy_correct_case"].mean(),
                                              alternative="larger"))


t-test:
Uncalibrated:
Ttest_relResult(statistic=9.9138738803607005, pvalue=1.6861497796564306e-16)
================
ranksum-test:
Uncalibrated:
RanksumsResult(statistic=5.1763193573766522, pvalue=2.2630595600568353e-07)
================
Binomial:
5.53696002821e-51

Output Data


In [55]:
# Output file data
raw_data.to_csv("../data/output/raw_docket_justice_model_growing_random_forest_1.csv.gz", compression="gzip")
case_data.to_csv("../data/output/raw_docket_case_model_growing_random_forest_1.csv.gz", compression="gzip")
feature_importance_df.to_csv("../data/output/raw_docket_features_model_growing_random_forest_1.csv.gz", compression="gzip")